####################################################
####          Replication Code for              ####
####     Bayesian versus maximum likelihood     ####
####      estimation of treatment effects       ####
####          in bivariate probit               ####  
####       instrumentalvariable models          ####    
####################################################
####                                            ####  
####                3/ 7/2017                   ####         
####     this file runs rcode to create Figures ####  
####              in  Appendix E                ####  
####################################################


#### recreate simulation parameters
N_seq = c(50, 100, 250, 500)
cor = c(0.25,0.5,0.75)
IV_strength = c(0.5, 1, 1.5, 2)
treatment = c(0, 1, 2, 3, 4)
simulation_para = data.frame(do.call(rbind,replicate(500,as.matrix(expand.grid(N_seq, cor, IV_strength, treatment)),simplify = F)))
names(simulation_para) = c("N","cor","IVcoef","treatment")
simulation_para = simulation_para[order(simulation_para$N, simulation_para$cor, simulation_para$IVcoef, simulation_para$treatment),]
simulation_para$iteration = seq(1,120000)



### load Bayesian model results with prior (0, 2.5) and Stata output
load("R_output25.rda")
load("stata_output.rda")
### stata bootstrapped CIs for ATE
load("late_estimates.rda")
names(late_est)[1] = "iteration"
### 2SLS output
load("sls_output.rda")


##combine stata and bayes results by iteration
##stata
stata = data.frame(stata_output[,c("iteration", "LATE")])
names(stata) = c("iteration", "StataLAte")
stata = join(stata, late_est[, c("iteration", "Lower","Upper")], by = "iteration")
names(stata) <- c("iteration", "StataLate", "Stata.Low95", "Stata.High95")

## bayes
Bayes_res = data.frame(t(R_output25["late",c("mean","median", "2.5%","97.5%", "TRUE"),]), (R_output25["iteration","Rhat/Misc",]))
names(Bayes_res) = c("BayesMean","BayesMedian", "BayesLow95","BayesHigh95", "TrueLate","iteration")

##stata sls
sls_output$sls.Low95 = sls_output$LATE - (qnorm(0.975)*sls_output$LATE.se)
sls_output$sls.High95 = sls_output$LATE + (qnorm(0.975)*sls_output$LATE.se)
sls = data.frame(sls_output[,c("iteration", "LATE", "sls.High95", "sls.Low95")])
names(sls) = c("iteration", "slsLate", "sls.High95", "sls.Low95")



sim_results = join(Bayes_res, stata, by = "iteration")
sim_results = join(sim_results, sls, by = "iteration")
sim_results = join(sim_results, simulation_para, by = "iteration")

#### check for any simulations for stata for which we couldn't calculate the CI
dim(sim_results[is.na(sim_results$Stata.High95) == T & is.na(sim_results$StataLate)==F,])#
dim(sim_results[is.na(sim_results$Stata.High95) == T & is.na(sim_results$StataLate)==F,])#

sim_results$StataLate = ifelse(is.na(sim_results$Stata.High95) == T, NA, sim_results$StataLate)




### create indicator whether 95CI includes true value
sim_results$Bayes95Contain.25 = ifelse(sim_results$TrueLate <= sim_results$BayesHigh95 & sim_results$TrueLate >= sim_results$BayesLow95, 1, 0)
sim_results$Stata95Contain = ifelse(sim_results$TrueLate <= sim_results$Stata.High95 & sim_results$TrueLate >= sim_results$Stata.Low95, 1, 0)
sim_results$sls95Contain = ifelse(sim_results$TrueLate <= sim_results$sls.High95  & sim_results$TrueLate >= sim_results$sls.Low95, 1, 0)

### calculate errors
sim_results$Stata_abs_error = abs(sim_results$StataLate - sim_results$TrueLate)
sim_results$Bayes_abs_error = abs(sim_results$BayesMedian - sim_results$TrueLate)
sim_results$sls_abs_error = abs(sim_results$slsLate - sim_results$TrueLate)

#### what if abs error late > 2, drop those
dim(sim_results[sim_results$sls_abs_error > 2,])

sim_results$slsLate = ifelse(sim_results$sls_abs_error > 2, NA, sim_results$sls_abs_error)



### analyze results
#### by N and IV strength
N = c(50, 100, 250, 500)
IVs =  c(0.5, 1, 1.5, 2.0)
Bayes_mse = stata_mse = sls_mse =  Bayes_mae = stata_mae = sls_mae = Bayes_pct95 = sls_pct95 = stata_pct95 = n_val = iv_val = NULL

for(i in 1: 4){
    for(j in 1:4){
        Bayes_mse = c(Bayes_mse, mean_sq(sim_results$BayesMedian[sim_results$N == N[i] & sim_results$IVcoef == IVs[j] & is.na(sim_results$BayesMedian)==FALSE] , sim_results$TrueLate[sim_results$N == N[i] & sim_results$IVcoef == IVs[j] & is.na(sim_results$BayesMedian)==FALSE]))
        stata_mse =  c(stata_mse , mean_sq(sim_results$StataLate[sim_results$N == N[i] & sim_results$IVcoef == IVs[j] & is.na(sim_results$StataLate)==FALSE] , sim_results$TrueLate[sim_results$N == N[i] & sim_results$IVcoef == IVs[j] & is.na(sim_results$StataLate)==FALSE]))
        sls_mse =  c(sls_mse , mean_sq(sim_results$slsLate[sim_results$N == N[i] & sim_results$IVcoef == IVs[j] & is.na(sim_results$slsLate)==FALSE] , sim_results$TrueLate[sim_results$N == N[i] & sim_results$IVcoef == IVs[j] & is.na(sim_results$slsLate)==FALSE]))
        
        
        Bayes_mae =  c(Bayes_mae, MAE(sim_results$BayesMedian[sim_results$N == N[i] & sim_results$IVcoef == IVs[j] & is.na(sim_results$BayesMedian)==FALSE] , sim_results$TrueLate[sim_results$N == N[i] & sim_results$IVcoef == IVs[j] & is.na(sim_results$BayesMedian)==FALSE]))
        stata_mae =  c(stata_mae, MAE(sim_results$StataLate[sim_results$N == N[i] & sim_results$IVcoef == IVs[j] & is.na(sim_results$StataLate)==FALSE] , sim_results$TrueLate[sim_results$N == N[i] & sim_results$IVcoef == IVs[j] & is.na(sim_results$StataLate)==FALSE]))
        sls_mae =  c(sls_mae, MAE(sim_results$slsLate[sim_results$N == N[i] & sim_results$IVcoef == IVs[j] & is.na(sim_results$slsLate)==FALSE] , sim_results$TrueLate[sim_results$N == N[i] & sim_results$IVcoef == IVs[j] & is.na(sim_results$slsLate)==FALSE]))
        
        Bayes_pct95 = c(Bayes_pct95, mean(sim_results$Bayes95Contain[sim_results$N == N[i] & sim_results$IVcoef == IVs[j] & is.na(sim_results$BayesMedian)==FALSE]))
        sls_pct95 = c(sls_pct95, mean(sim_results$sls95Contain[sim_results$N == N[i] & sim_results$IVcoef == IVs[j] & is.na(sim_results$slsLate)==FALSE]))
        stata_pct95 = c(stata_pct95, mean(sim_results$Stata95Contain[sim_results$N == N[i] & sim_results$IVcoef == IVs[j] & is.na(sim_results$StataLate)==FALSE & is.na(sim_results$Stata.Low95)==FALSE]))
        n_val = c(n_val, N[i])
        iv_val = c(iv_val, IVs[j])
        }
}       




data_plot = data.frame(mse = c(Bayes_mse, stata_mse, sls_mse), c95 = c(Bayes_pct95, stata_pct95, sls_pct95), iv= rep(iv_val,3),n = rep(n_val,3), method = c(rep("Bayes.25",16),rep("Stata",16), rep("2sls", 16)))
### prepare Figure E1 in Appendix

p1 = ggplot(data = data_plot[data_plot$n == 50,],aes(y = mse, x = iv))
p1 = p1 + geom_point(aes(colour = method, shape = method, linetype = method), size = 5) + geom_line(aes(colour = method, linetype = method, shape = method),size = 2.5)
p1 = p1 + theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) 
p1 = p1 + labs(title = "N = 50", y="MSE", x = "Instrument Strength")+ theme(axis.text.x=element_text(size=20)) + theme(axis.text.y=element_text(size=20))+ theme(axis.title.x = element_text(size = 20),axis.title.y = element_text(size = 20), title = element_text(size = 25))
p1 = p1 + scale_colour_manual(name = "Estimation Method", labels = c("2SLS", "Bayes, Prior SD = 2.5","Stata MLE"), values = c("#e41a1c", "#4daf4a", "gray10")) + scale_shape_manual(name = "Estimation Method", labels = c("2SLS", "Bayes, Prior SD = 2.5","Stata MLE"), values = c(15, 17, 20)) + scale_linetype_manual(name = "Estimation Method", labels = c("2SLS", "Bayes, Prior SD = 2.5", "Stata MLE"), values = c(1, 3, 6))
p1 = p1 + theme(legend.position = c(.65, .75), legend.direction = "horizontal", legend.key = element_blank(), legend.background = element_rect(colour = "white"), legend.text = element_text(size = 14), legend.title = element_text(size = 14), panel.border = element_rect(colour = "black", fill=NA, size=3.5))# + scale_y_continuous(limits = c(0, 1))
plot(p1)
leg = g_legend(p1)
p1 = p1 + theme(legend.position="none")


p2 = ggplot(data = data_plot[data_plot$n == 100,],aes(y = mse, x = iv))
p2 = p2 + geom_point(aes(colour = method, shape = method, linetype = method), size = 5) + geom_line(aes(colour = method, linetype = method, shape = method),size = 2.5)
p2 = p2 + theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) 
p2 = p2 + labs(title = "N = 100", y="MSE", x = "Instrument Strength")+ theme(axis.text.x=element_text(size=20)) + theme(axis.text.y=element_text(size=20))+ theme(axis.title.x = element_text(size = 20),axis.title.y = element_text(size = 20), title = element_text(size = 25))
p2 = p2 + scale_colour_manual(name = "Estimation Method", labels = c("2SLS", "Bayes, Prior SD = 2.5","Stata MLE"), values = c("#e41a1c", "#4daf4a", "gray10")) + scale_shape_manual(name = "Estimation Method", labels = c("2SLS", "Bayes, Prior SD = 2.5", "Stata MLE"), values = c(15, 17, 20)) + scale_linetype_manual(name = "Estimation Method", labels = c("2SLS", "Bayes, Prior SD = 2.5","Stata MLE"), values = c(1, 3, 6))
p2 = p2 + theme(legend.position="none") + theme(panel.border = element_rect(colour = "black", fill=NA, size=3.5))
plot(p2)


p3 = ggplot(data = data_plot[data_plot$n == 250,],aes(y = mse, x = iv))
p3 = p3 + geom_point(aes(colour = method, shape = method, linetype = method), size = 5) + geom_line(aes(colour = method, linetype = method, shape = method),size = 2.5)
p3 = p3 + theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) 
p3 = p3 + labs(title = "N = 250", y="MSE", x = "Instrument Strength")+ theme(axis.text.x=element_text(size=20)) + theme(axis.text.y=element_text(size=20))+ theme(axis.title.x = element_text(size = 20),axis.title.y = element_text(size = 20), title = element_text(size = 25))
p3 = p3 + scale_colour_manual(name = "Estimation Method", labels = c("2SLS", "Bayes, Prior SD = 2.5","Stata MLE"), values = c("#e41a1c", "#4daf4a", "gray10")) + scale_shape_manual(name = "Estimation Method", labels = c("2SLS", "Bayes, Prior SD = 2.5", "Stata MLE"), values = c(15, 17, 20)) + scale_linetype_manual(name = "Estimation Method", labels = c("2SLS", "Bayes, Prior SD = 2.5","Stata MLE"), values = c(1, 3, 6))
p3 = p3 + theme(legend.position="none") + theme(panel.border = element_rect(colour = "black", fill=NA, size=3.5))
plot(p3)

p4 = ggplot(data = data_plot[data_plot$n == 500,],aes(y = mse, x = iv))
p4 = p4 + geom_point(aes(colour = method, shape = method, linetype = method), size = 5) + geom_line(aes(colour = method, linetype = method, shape = method),size = 2.5)
p4 = p4 + theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) 
p4 = p4 + labs(title = "N = 500", y="MSE", x = "Instrument Strength")+ theme(axis.text.x=element_text(size=20)) + theme(axis.text.y=element_text(size=20))+ theme(axis.title.x = element_text(size = 20),axis.title.y = element_text(size = 20), title = element_text(size = 25))
p4 = p4 + scale_colour_manual(name = "Estimation Method", labels = c("2SLS", "Bayes, Prior SD = 2.5","Stata MLE"), values = c("#e41a1c", "#4daf4a", "gray10")) + scale_shape_manual(name = "Estimation Method", labels = c("2SLS", "Bayes, Prior SD = 2.5", "Stata MLE"), values = c(15, 17, 20)) + scale_linetype_manual(name = "Estimation Method", labels = c("2SLS", "Bayes, Prior SD = 2.5","Stata MLE"), values = c(1, 3, 6))
p4 = p4 + theme(legend.position="none") + theme(panel.border = element_rect(colour = "black", fill=NA, size=3.5))
plot(p4)

pdf("Figure_E1.pdf", width = 12, height = 12 )
grid.newpage()
pushViewport(viewport(layout = grid.layout(4,3, heights = unit(c(11,1, 11, 2), "null"), width = unit(c(11,1, 11), "null"))))   
print(p1, vp = viewport(layout.pos.row = 1, layout.pos.col = 1))    
print(p2, vp = viewport(layout.pos.row = 1, layout.pos.col = 3))         
print(p3, vp = viewport(layout.pos.row = 3, layout.pos.col = 1))         
print(p4, vp = viewport(layout.pos.row = 3, layout.pos.col = 3))
leg$vp = viewport(layout.pos.row = 4, layout.pos.col = c(1:3))
grid.draw(leg) 
dev.off()

### now plot 95% coverage
### Figure E2 in Appendix
p1 = ggplot(data = data_plot[data_plot$n == 50,],aes(y = c95, x = iv))
p1 = p1 + geom_point(aes(colour = method, shape = method, linetype = method), size = 5) + geom_line(aes(colour = method, linetype = method, shape = method),size = 2.5)
p1 = p1 + theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) 
p1 = p1 + labs(title = "N = 50", y="95% Interval Coverage", x = "Instrument Strength")+ theme(axis.text.x=element_text(size=20)) + theme(axis.text.y=element_text(size=20))+ theme(axis.title.x = element_text(size = 20),axis.title.y = element_text(size = 20), title = element_text(size = 25))
p1 = p1 + scale_colour_manual(name = "Estimation Method", labels = c("2SLS", "Bayes, Prior SD = 2.5","Stata MLE"), values = c("#e41a1c", "#4daf4a", "gray10")) + scale_shape_manual(name = "Estimation Method", labels = c("2SLS", "Bayes, Prior SD = 2.5", "Stata MLE"), values = c(15, 17, 20)) + scale_linetype_manual(name = "Estimation Method", labels = c("2SLS", "Bayes, Prior SD = 2.5","Stata MLE"), values = c(1, 3, 6))
p1 = p1 + geom_hline(yintercept = 0.95, colour = I("RED"),  size = 1)
p1 = p1 + geom_hline(yintercept = 1, colour = I("BLUE"), size = 1) + scale_y_continuous(limits = c(0.7, 1.005), breaks = c(0.5,0.6,0.7,0.8,0.9,0.95,1.0),labels = c("0.5","0.6","0.7","0.8","0.9","0.95","1.0"))
p1 = p1 + theme(legend.position = c(.65, .75), legend.direction = "horizontal", legend.key = element_blank(), legend.background = element_rect(colour = "white"), legend.text = element_text(size = 14), legend.title = element_text(size = 14), panel.border = element_rect(colour = "black", fill=NA, size=3.5))# + scale_y_continuous(limits = c(0, 1))
plot(p1)
leg = g_legend(p1)
p1 = p1 + theme(legend.position="none")


p2 = ggplot(data = data_plot[data_plot$n == 100,],aes(y = c95, x = iv))
p2 = p2 + geom_point(aes(colour = method, shape = method, linetype = method), size = 5) + geom_line(aes(colour = method, linetype = method, shape = method),size = 2.5)
p2 = p2 + theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) 
p2 = p2 + labs(title = "N = 100", y="95% Interval Coverage", x = "Instrument Strength")+ theme(axis.text.x=element_text(size=20)) + theme(axis.text.y=element_text(size=20))+ theme(axis.title.x = element_text(size = 20),axis.title.y = element_text(size = 20), title = element_text(size = 25))
p2 = p2 + scale_colour_manual(name = "Estimation Method", labels = c("2SLS", "Bayes, Prior SD = 2.5","Stata MLE"), values = c("#e41a1c", "#4daf4a", "gray10")) + scale_shape_manual(name = "Estimation Method", labels = c("2SLS", "Bayes, Prior SD = 2.5", "Stata MLE"), values = c(15, 17, 20)) + scale_linetype_manual(name = "Estimation Method", labels = c("2SLS", "Bayes, Prior SD = 2.5","Stata MLE"), values = c(1, 3, 6))
p2 = p2 + theme(legend.position="none") + theme(panel.border = element_rect(colour = "black", fill=NA, size=3.5))
p2 = p2 + geom_hline(yintercept = 0.95, colour = I("RED"), size = 1)
p2 = p2 + geom_hline(yintercept = 1, colour = I("BLUE"),  size = 1) + scale_y_continuous(limits = c(0.7, 1.005), breaks = c(0.5,0.6,0.7,0.8,0.9,0.95,1.0),labels = c("0.5","0.6","0.7","0.8","0.9","0.95","1.0"))
plot(p2)


p3 = ggplot(data = data_plot[data_plot$n == 250,],aes(y = c95, x = iv))
p3 = p3 + geom_point(aes(colour = method, shape = method, linetype = method), size = 5) + geom_line(aes(colour = method, linetype = method, shape = method),size = 2.5)
p3 = p3 + theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) 
p3 = p3 + labs(title = "N = 250", y="95% Interval Coverage", x = "Instrument Strength")+ theme(axis.text.x=element_text(size=20)) + theme(axis.text.y=element_text(size=20))+ theme(axis.title.x = element_text(size = 20),axis.title.y = element_text(size = 20), title = element_text(size = 25))
p3 = p3 + scale_colour_manual(name = "Estimation Method", labels = c("2SLS", "Bayes, Prior SD = 2.5","Stata MLE"), values = c("#e41a1c", "#4daf4a", "gray10")) + scale_shape_manual(name = "Estimation Method", labels = c("2SLS", "Bayes, Prior SD = 2.5", "Stata MLE"), values = c(15, 17, 20)) + scale_linetype_manual(name = "Estimation Method", labels = c("2SLS", "Bayes, Prior SD = 2.5","Stata MLE"), values = c(1, 3, 6))
p3 = p3 + theme(legend.position="none") + theme(panel.border = element_rect(colour = "black", fill=NA, size=3.5))
p3 = p3 + geom_hline(yintercept = 0.95, colour = I("RED"), size = 1)
p3 = p3 + geom_hline(yintercept = 1, colour = I("BLUE"), size = 1) + scale_y_continuous(limits = c(0.6, 1.005), breaks = c(0.3,0.4,0.5,0.6,0.7,0.8,0.9,0.95,1.0),labels = c("0.3","0.4","0.5","0.6","0.7","0.8","0.9","0.95","1.0"))
plot(p3)

p4 = ggplot(data = data_plot[data_plot$n == 500,],aes(y = c95, x = iv))
p4 = p4 + geom_point(aes(colour = method, shape = method, linetype = method), size = 5) + geom_line(aes(colour = method, linetype = method, shape = method),size = 2.5)
p4 = p4 + theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) 
p4 = p4 + labs(title = "N = 500", y="95% Interval Coverage", x = "Instrument Strength")+ theme(axis.text.x=element_text(size=20)) + theme(axis.text.y=element_text(size=20))+ theme(axis.title.x = element_text(size = 20),axis.title.y = element_text(size = 20), title = element_text(size = 25))
p4 = p4 + scale_colour_manual(name = "Estimation Method", labels = c("2SLS", "Bayes, Prior SD = 2.5","Stata MLE"), values = c("#e41a1c", "#4daf4a", "gray10")) + scale_shape_manual(name = "Estimation Method", labels = c("2SLS", "Bayes, Prior SD = 2.5", "Stata MLE"), values = c(15, 17, 20)) + scale_linetype_manual(name = "Estimation Method", labels = c("2SLS", "Bayes, Prior SD = 2.5","Stata MLE"), values = c(1, 3, 6))
p4 = p4 + theme(legend.position="none") + theme(panel.border = element_rect(colour = "black", fill=NA, size=3.5))
p4 = p4 + geom_hline(yintercept = 0.95, colour = I("RED"), size = 1)
p4 = p4 + geom_hline(yintercept = 1, colour = I("BLUE"), size = 1) + scale_y_continuous(limits = c(0.6, 1.005), breaks = c(0.3,0.4,0.5,0.6,0.7,0.8,0.9,0.95,1.0),labels = c("0.3","0.4","0.5","0.6","0.7","0.8","0.9","0.95","1.0"))
plot(p4)



pdf("Figure_E2.pdf", width = 12, height = 12 )
grid.newpage()
pushViewport(viewport(layout = grid.layout(4,3, heights = unit(c(11,1, 11, 2), "null"), width = unit(c(11,1, 11), "null"))))   
print(p1, vp = viewport(layout.pos.row = 1, layout.pos.col = 1))    
print(p2, vp = viewport(layout.pos.row = 1, layout.pos.col = 3))         
print(p3, vp = viewport(layout.pos.row = 3, layout.pos.col = 1))         
print(p4, vp = viewport(layout.pos.row = 3, layout.pos.col = 3))
leg$vp = viewport(layout.pos.row = 4, layout.pos.col = c(1:3))
grid.draw(leg) 
dev.off()
 


